#setwd("Research/BlueRed/Analysis/Replication")
setwd("C:/Research/Blue Red States/QJPS/Data/Replication/Replication3-8")

library(foreign)
library(arm)

#setwd("Research/BlueRed/Analysis/Replication")
setwd("C:/Research/Blue Red States/QJPS/Data/Replication/Replication3-8")

source("superplot function.R")
load(file="cps.income.Rdata")
results=list()
years=seq(1968,2004,4)

# run exit polls code here
# 2004
exit2004.data <- read.dta("2004_labeled_processed_race.dta", convert.factors=F)

state <- seq(1,50)
region.all <- rep(NA, length(state))
for (i in 1:length(state)){
  temp <- subset(exit2004.data, exit2004.data$state==i)
  region.all[i] <- median(temp$region)
}

state.income.data<-read.dta("avgincome_orig.dta", convert.factors=T)
income.new <- exit2004.data$income-3
y <- exit2004.data$y
state.new <- exit2004.data$state 
region <- region.all[state.new]
data <- cbind(exit2004.data, income.new, y, state.new)
n.state <-length(unique(state.new))
state.income <- as.list(rep(NA,9))
i <- min(state.income.data$st.year)
  while(i <= 2000){
state.income[[((i-min(state.income.data$st.year))/4)+1]] <- state.income.data[state.income.data$st.year==i,]
i <- i + 4
}
avg.income <- log(state.income[[9]]$st.inc10k)
n <- length(y) 
#income.new:  individual-level income
#state.new:  the state index variable
#avg.income:  state-level income (a vector of length 50)
avg.income.expanded <- avg.income[state.new]
fit.2004 <- lmer (y ~ income.new*factor(region) + avg.income.expanded + income.new:avg.income.expanded + factor(region) + (1 + income.new | state.new), family=binomial(link="logit"))
display (fit.2004)
#then, the coefs can be accessed using 
coef(fit.2004)
region.indic <- read.dta("region_dummies.dta", convert.factors=F)
## new: note sl and it's use below in 3 separate instances
sl2004 = as.integer(rownames(coef(fit.2004)[[1]]))
##

A <- as.matrix(coef(fit.2004)$state.new)
B <- array (NA, c(50, ncol(A)))
C <- as.numeric(rownames(A))
for (i in 1:length(C)){
B[C[i],] <- A[i,]
}

income.slope.2004 <- B[,2] + B[,10]*avg.income +
  region.indic[,1]*B[,7] +
  region.indic[,2]*B[,8] + region.indic[,3]*B[,9]

income.intercept.2004 <- B[,1] + B[,6]*avg.income +
  region.indic[,1]*B[,3] +
  region.indic[,2]*B[,4] + region.indic[,3]*B[,5]
 
# 2000
exit2000.data <- read.dta("2000_labeled_processed_race.dta", convert.factors=F)
state.income.data <- read.dta("avgincome_orig.dta",convert.factors=T)
income.new <- exit2000.data$income-3
y <- exit2000.data$y
state.new <- exit2000.data$state
region <- region.all[state.new]
data <- cbind(exit2000.data, income.new, y, state.new)
n.state <-length(unique(state.new))
state.income<-as.list(rep(NA,9))
i <- min(state.income.data$st.year)
  while(i <= 2000){
state.income[[((i-min(state.income.data$st.year))/4)+1]] <- state.income.data[state.income.data$st.year==i,]
i <- i + 4
}
avg.income <- log(state.income[[9]]$st.inc10k)
n <- length(y) 
#income.new:  individual-level income
#state.new:  the state index variable
#avg.income:  state-level income (a vector of length 50)
avg.income.expanded <- avg.income[state.new]
fit.2000 <- lmer (y ~ income.new*factor(region) + avg.income.expanded + income.new:avg.income.expanded  + factor(region) + (1 + income.new | state.new), family=binomial(link="logit"))
display (fit.2000)
#then, the coefs can be accessed using 
coef(fit.2000)
region.indic <- read.dta("region_dummies.dta",convert.factors=F)
## new: note sl and it's use below in 3 separate instances
sl2000 = as.integer(rownames(coef(fit.2000)[[1]]))
##

A <- as.matrix(coef(fit.2000)$state.new)
B <- array (NA, c(50, ncol(A)))
C <- as.numeric(rownames(A))
for (i in 1:length(C)){
B[C[i],] <- A[i,]
}

income.slope.2000 <- B[,2] + B[,10]*avg.income +
  region.indic[,1]*B[,7] +
  region.indic[,2]*B[,8] + region.indic[,3]*B[,9]

income.intercept.2000 <- B[,1] + B[,6]*avg.income +
  region.indic[,1]*B[,3] +
  region.indic[,2]*B[,4] + region.indic[,3]*B[,5]


# 1996
exit1996.data <- read.dta("1996_labeled_processed_race.dta", convert.factors=F)
state.income.data <- read.dta("avgincome_orig.dta",convert.factors=T)
income.new <- exit1996.data$income-3
y <- exit1996.data$y
state.new <- exit1996.data$state 
region <- region.all[state.new]
data <- cbind(exit1996.data, income.new, y, state.new)
n.state <- length(unique(state.new))
state.income <- as.list(rep(NA,9))
i <- min(state.income.data$st.year)
  while(i <= 2000){
state.income[[((i-min(state.income.data$st.year))/4)+1]] <- state.income.data[state.income.data$st.year==i,]
i <- i + 4
}
avg.income<-log(state.income[[8]]$st.inc10k)
n <- length(y) 
#income.new:  individual-level income
#state.new:  the state index variable
#avg.income:  state-level income (a vector of length 50)
avg.income.expanded <- avg.income[state.new]
fit.1996 <- lmer (y ~ income.new*factor(region) + avg.income.expanded + income.new:avg.income.expanded + factor(region) + (1 + income.new | state.new), family=binomial(link="logit"))
display (fit.1996)
#then, the coefs can be accessed using 
coef(fit.1996)
region.indic <- read.dta("region_dummies.dta", convert.factors=F)
## new: note sl and it's use below in 3 separate instances
sl1996 = as.integer(rownames(coef(fit.1996)[[1]]))
##

A <- as.matrix(coef(fit.1996)$state.new)
B <- array (NA, c(50, ncol(A)))
C <- as.numeric(rownames(A))
for (i in 1:length(C)){
B[C[i],] <- A[i,]
}

income.slope.1996 <- B[,2] + B[,10]*avg.income +
  region.indic[,1]*B[,7] +
  region.indic[,2]*B[,8] + region.indic[,3]*B[,9]

income.intercept.1996 <- B[,1] + B[,6]*avg.income +
  region.indic[,1]*B[,3] +
  region.indic[,2]*B[,4] + region.indic[,3]*B[,5]

  
# 1992
exit1992.data <- read.dta("1992_labeled_processed_race.dta", convert.factors=F)
state.income.data <- read.dta("avgincome_orig.dta",convert.factors=T)
income.new <- exit1992.data$income-3
y <- exit1992.data$y
state.new <- exit1992.data$state 
region <- region.all[state.new]
data <- cbind(exit1992.data, income.new, y, state.new)
n.state <- length(unique(state.new))
state.income <- as.list(rep(NA,9))
i <- min(state.income.data$st.year)
  while(i <= 2000){
state.income[[((i-min(state.income.data$st.year))/4)+1]] <- state.income.data[state.income.data$st.year==i,]
i <- i + 4
}
avg.income<-log(state.income[[7]]$st.inc10k)
n <- length(y) 
#income.new:  individual-level income
#state.new:  the state index variable
#avg.income:  state-level income (a vector of length 50)
avg.income.expanded <- avg.income[state.new]
fit.1992 <- lmer (y ~ income.new*factor(region) + avg.income.expanded + income.new:avg.income.expanded + factor(region) + (1 + income.new | state.new), family=binomial(link="logit"))
display (fit.1992)
#then, the coefs can be accessed using 
coef(fit.1992)
region.indic <- read.dta("region_dummies.dta",convert.factors=F)
## new: note sl and it's use below in 3 separate instances
sl1992 = as.integer(rownames(coef(fit.1992)[[1]]))

A <- as.matrix(coef(fit.1992)$state.new)
B <- array (NA, c(50, ncol(A)))
C <- as.numeric(rownames(A))
for (i in 1:length(C)){
B[C[i],] <- A[i,]
}

income.slope.1992 <- B[,2] + B[,10]*avg.income +
  region.indic[,1]*B[,7] +
  region.indic[,2]*B[,8] + region.indic[,3]*B[,9]

income.intercept.1992 <- B[,1] + B[,6]*avg.income +
  region.indic[,1]*B[,3] +
  region.indic[,2]*B[,4] + region.indic[,3]*B[,5]
 
# 1988
exit1988.data <- read.dta("1988_labeled_processed_race.dta", convert.factors=F)
state.income.data <- read.dta("avgincome_orig.dta",convert.factors=T)
income.new <- exit1988.data$income-3
y <- exit1988.data$y
state.new <- exit1988.data$state 
region <- region.all[state.new]
data <- cbind(exit1988.data, income.new, y, state.new)
n.state <- length(unique(state.new))
state.income<-as.list(rep(NA,9))
i <- min(state.income.data$st.year)
  while(i <= 2000){
state.income[[((i-min(state.income.data$st.year))/4)+1]] <- state.income.data[state.income.data$st.year==i,]
i <- i + 4
}
avg.income <- log(state.income[[6]]$st.inc10k)
n <- length(y) 
# income.new:  individual-level income
# state.new:  the state index variable
# avg.income:  state-level income (a vector of length 50)
avg.income.expanded <- avg.income[state.new]
fit.1988 <- lmer (y ~ income.new*factor(region) + avg.income.expanded + income.new:avg.income.expanded + factor(region) + (1 + income.new | state.new), family=binomial(link="logit"))
display (fit.1988)
# then, the coefs can be accessed using 
coef(fit.1988)
region.indic <- read.dta("region_dummies.dta",convert.factors=F)
## new: note sl and it's use below in 3 separate instances
sl1988 = as.integer(rownames(coef(fit.1988)[[1]]))
##

A <- as.matrix(coef(fit.1988)$state.new)
B <- array (NA, c(50, ncol(A)))
C <- as.numeric(rownames(A))
for (i in 1:length(C)){
B[C[i],] <- A[i,]
}

income.slope.1988 <- B[,2] + B[,10]*avg.income +
  region.indic[,1]*B[,7] +
  region.indic[,2]*B[,8] + region.indic[,3]*B[,9]

income.intercept.1988 <- B[,1] + B[,6]*avg.income +
  region.indic[,1]*B[,3] +
  region.indic[,2]*B[,4] + region.indic[,3]*B[,5]


# 1984
exit1984.data <- read.dta("1984_labeled_processed_race.dta", convert.factors=F)
state.income.data<-read.dta("avgincome_orig.dta",convert.factors=T)
income.new <- exit1984.data$income-3
y <- exit1984.data$y
state.new <- exit1984.data$state 
region <- region.all[state.new]
data <- cbind(exit1984.data, income.new, y, state.new)
n.state <- length(unique(state.new))
state.income <- as.list(rep(NA,9))
i <- min(state.income.data$st.year)
  while(i <= 2000){
state.income[[((i-min(state.income.data$st.year))/4)+1]] <- state.income.data[state.income.data$st.year==i,]
i <- i + 4
}
avg.income<-log(state.income[[5]]$st.inc10k)
n <- length(y) 
# income.new:  individual-level income
# state.new:  the state index variable
# avg.income:  state-level income (a vector of length 50)
avg.income.expanded <- avg.income[state.new]
fit.1984 <- lmer (y ~ income.new*factor(region) + avg.income.expanded + income.new:avg.income.expanded + factor(region) + (1 + income.new | state.new), family=binomial(link="logit"))
display (fit.1984)
# then, the coefs can be accessed using 
coef(fit.1984)
# region.indic <- read.dta("region_dummies_84.dta", convert.factors=F)
region.indic <- read.dta("region_dummies.dta", convert.factors=F)
## new: note sl and it's use below in 3 separate instances
# sl1984 = as.integer(rownames(coef(fit.1984)[[1]]))
sl1984 = state

A <- as.matrix(coef(fit.1984)$state.new)
B <- array (NA, c(50, ncol(A)))
C <- as.numeric(rownames(A))
for (i in 1:length(C)){
B[C[i],] <- A[i,]
}

income.slope.1984 <- B[,2] + B[,10]*avg.income +
  region.indic[,1]*B[,7] +A <- as.matrix(coef(fit.1988)$state.new)
B <- array (NA, c(50, ncol(A)))
C <- as.numeric(rownames(A))
for (i in 1:length(C)){
B[C[i],] <- A[i,]
}

income.slope.1984 <- B[,2] + B[,10]*avg.income +
  region.indic[,1]*B[,7] +
  region.indic[,2]*B[,8] + region.indic[,3]*B[,9]

income.intercept.1984 <- B[,1] + B[,6]*avg.income +
  region.indic[,1]*B[,3] +
  region.indic[,2]*B[,4] + region.indic[,3]*B[,5]

 
# superplot
results[[10]]=cbind(income.intercept.2004,income.slope.2004); rownames(results[[10]])=sl2004
results[[9]]=cbind(income.intercept.2000,income.slope.2000); rownames(results[[9]])=sl2000
results[[8]]=cbind(income.intercept.1996,income.slope.1996); rownames(results[[8]])=sl1996
results[[7]]=cbind(income.intercept.1992,income.slope.1992); rownames(results[[7]])=sl1992
results[[6]]=cbind(income.intercept.1988,income.slope.1988); rownames(results[[6]])=sl1988
results[[5]]=cbind(income.intercept.1984,income.slope.1984); rownames(results[[5]])=sl1984

superp=superplot(results, start=5, end=10, rows=3, columns=2, indiv=1, lmer=1, var.slope=1, lowbound=-2, hibound=2, xlabels=c(9,10), ylabels=c(5,7,9), sf=4)

savePlot("Fig6", type=c("pdf"))
savePlot("Fig6", type=c("ps"))
savePlot("Fig6", type=c("eps"))

# avg income
state.income<-as.list(rep(NA,9))
i <- min(state.income.data$st.year)
  while(i <= 2000){
state.income[[((i-min(state.income.data$st.year))/4)+1]] <- state.income.data[state.income.data$st.year==i,]
i <- i + 4
}
avg.income.2004<-(state.income[[9]]$st.income)
avg.income.2000<-(state.income[[9]]$st.income)
avg.income.1996<-(state.income[[8]]$st.income)
avg.income.1992<-(state.income[[7]]$st.income)
avg.income.1988<-(state.income[[6]]$st.income)
avg.income.1984<-(state.income[[5]]$st.income)

# slopes
par(mfrow=c(3,2))
par(mar=c(3.5, 3.5, 3, .5))

#par(mar=c(3, 3.75, 3, .5)) # bottom, left, top, right
state.abb.1984<-state.abb[sl1984]
x <- cbind(avg.income.1984[sl1984], income.slope.1984)
x1 <- subset(x, x[,2]!="NA")
plot(range(x1[,1]),c(0,.65), main=1984, cex.lab=1.5, cex.main=1.75, type="n", ylab="Slope", xlab="", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,seq(0,.6,.2),cex.axis=1.5)
axis(side=1, c(15000,25000), c("$15,000", "$25,000"), cex.axis=1.5)
text(avg.income.1984[sl1984], income.slope.1984, label=state.abb.1984)
lines(lowess(x1[,1], x1[,2]))
# lines(lowess(avg.income.1984[sl1984],income.slope.1984))
abline (0,0,col="gray")

plot(range((avg.income.1988[sl1988])),c(0,.65), main=1988, cex.lab=1.5, cex.main=1.75, type="n", ylab="", xlab="", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,seq(0,.6,.2),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text((avg.income.1988),income.slope.1988, label=state.abb)
lines(lowess((avg.income.1988),income.slope.1988))
abline (0,0,col="gray")

plot(range((avg.income.1992[sl1992])),c(0,.65), main=1992, cex.lab=1.5, cex.main=1.75, type="n", ylab="Slopes", xlab="", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,seq(0,.6,.2),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text(avg.income.1992,income.slope.1992, label=state.abb)
lines(lowess((avg.income.1992),income.slope.1992))
abline (0,0,col="gray")

plot(range((avg.income.1996[sl1996])),c(0,.65), main=1996, cex.lab=1.5, cex.main=1.75, type="n", ylab="", xlab="", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,seq(0,.6,.2),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text(avg.income.1996,income.slope.1996, label=state.abb)
lines(lowess((avg.income.1996),income.slope.1996))
abline (0,0,col="gray")

plot(range((avg.income.2000[sl2000])),c(0,.65), main=2000, cex.lab=1.5, cex.main=1.75, type="n", ylab="Slopes", xlab="Median income within state", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,seq(0,.6,.2),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.55)
text(avg.income.2000,income.slope.2000, label=state.abb)
lines(lowess(avg.income.2000,income.slope.2000))
abline (0,0,col="gray")

plot(range((avg.income.2004[sl2004])),c(0,.65), main=2004, cex.lab=1.5, cex.main=1.75, type="n", ylab="", xlab="Median income within state", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,seq(0,.6,.2),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text(avg.income.2004,income.slope.2004, label=state.abb)
lines(lowess(avg.income.2004,income.slope.2004))
abline (0,0,col="gray")

savePlot("Fig8", type=c("pdf"))
savePlot("Fig8", type=c("ps"))
savePlot("Fig8", type=c("eps"))

# intercepts
par(mfrow=c(3,2))
par(mar=c(3.5, 3.5, 3, .5))

state.abb.1984<-state.abb[sl1984]
x <- cbind(avg.income.1984[sl1984], income.intercept.1984)
x1 <- subset(x, x[,2]!="NA")
plot(range(x1[,1]), c(-1,1.25), main=1984, cex.lab=1.5, cex.main=1.75, type="n", ylab="Intercepts", xlab="", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,c(-1,0,1),cex.axis=1.5)
axis(side=1,c(15000, 25000),c("$15,000", "$25,000"), cex.axis=1.5)
text(avg.income.1984[sl1984], income.intercept.1984, label=state.abb.1984)
lines(lowess(x1[,1], x1[,2]))
abline (0,0,col="gray")

x <- cbind(avg.income.1988[sl1988], income.intercept.1988)
x1 <- subset(x, x[,2]!="NA")
plot(range(x1[,1]),c(-1,1.25), main=1988, cex.lab=1.5, cex.main=1.75, type="n", ylab="", xlab="", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,c(-1,0,1),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text((avg.income.1988),income.intercept.1988, label=state.abb)
lines(lowess(x1[,1], x1[,2]))
abline (0,0,col="gray")

plot(range((avg.income.1992[sl1992])),c(-1,1.25), main=1992, cex.lab=1.5, cex.main=1.75, type="n", ylab="Intercepts", xlab="", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,c(-1,0,1),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text((avg.income.1992),income.intercept.1992, label=state.abb)
lines(lowess(avg.income.1992,income.intercept.1992))
abline (0,0,col="gray")

plot(range((avg.income.1996[sl1996])),c(-1,1.25), main=1996, cex.lab=1.5, cex.main=1.75, type="n", ylab="", xlab="", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,c(-1,0,1),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text((avg.income.1996),income.intercept.1996, label=state.abb)
lines(lowess(avg.income.1996,income.intercept.1996))
abline (0,0,col="gray")

plot(range((avg.income.2000[sl2000])),c(-1,1.25), main=2000, cex.lab=1.5, cex.main=1.75, type="n", ylab="Intercepts", xlab="Median income within state", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,c(-1,0,1),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text((avg.income.2000),income.intercept.2000, label=state.abb)
lines(lowess(avg.income.2000,income.intercept.2000))
abline (0,0,col="gray")

plot(range((avg.income.2004[sl2004])),c(-1,1.25), main=2004, cex.lab=1.5, cex.main=1.75, type="n", ylab="", xlab="Median income within state", cex.axis=1.5, yaxt="n", xaxt="n", mgp=c(2.5,.5,0))
axis(side=2,c(-1,0,1),cex.axis=1.5)
axis(side=1,c(20000,30000),c("$20,000", "$30,000"),cex.axis=1.5)
text((avg.income.2004),income.intercept.2004, label=state.abb)
lines(lowess(avg.income.2004,income.intercept.2004))
abline (0,0,col="gray")

savePlot("Fig7", type=c("pdf"))
savePlot("Fig7", type=c("ps"))
savePlot("Fig7", type=c("eps"))
